An overview of the 4 wilderness areas

Area Overview.

Satellite View.

EDA

From the describe function (output hidden) we see that:

Variable plots

forest %>% 
  mutate(Cover = as.factor(Cover_Type)) %>%
  ggplot(aes(x=Cover, y=Elevation)) +
  geom_boxplot(aes(fill=Cover, group=Cover)) +
  ggtitle('Fig 1:  Elevation')

forest %>% 
  mutate(Cover = as.factor(Cover_Type)) %>%
  ggplot(aes(x=Cover, y=Slope)) +
  geom_boxplot(aes(fill=Cover, group=Cover)) +
  ggtitle('Fig 2:  Slope')

forest %>% 
  mutate(Cover = as.factor(Cover_Type)) %>%
  ggplot(aes(x=Cover, y=Aspect)) +
  geom_boxplot(aes(fill=Cover, group=Cover)) +
  #geom_jitter(alpha=0.2, width=0.1, height=0.0) +
  ggtitle('Fig 3:  Aspect seems like a poor predictor')

forest %>% 
  mutate(Cover = as.factor(Cover_Type)) %>%
  ggplot(aes(x=Cover, y=Horizontal_Distance_To_Hydrology)) +
  geom_boxplot(aes(fill=Cover, group=Cover)) +
  ggtitle('Fig 4:  Horiz Dist to Hydro')

forest %>% 
  mutate(Cover = as.factor(Cover_Type)) %>%
  ggplot(aes(x=Cover, y=Vertical_Distance_To_Hydrology)) +
  geom_boxplot(aes(fill=Cover, group=Cover)) +
  ggtitle('Fig 5:  Vertical Distance to Hydro seems
          like a poor predictor')

forest %>% 
  mutate(Cover = as.factor(Cover_Type)) %>%
  ggplot(aes(x=Cover, y=Horizontal_Distance_To_Fire_Points)) +
  geom_boxplot(aes(fill=Cover, group=Cover)) +
  ggtitle('Fig x:  Horiz Distance to Fire')

forest %>% 
  mutate(Cover = as.factor(Cover_Type)) %>%
  ggplot(aes(x=Slope, y=Elevation)) +
  geom_point(alpha=.1)+
  facet_grid(.~forest$Cover) +
  ggtitle('Fig 6:  Elevation vs Slope per Cover category')

forest %>% 
  mutate(Cover = as.factor(Cover_Type)) %>%
  ggplot(aes(x=Cover, y=Hillshade_Noon)) +
  geom_boxplot(alpha=.1)+
  #facet_grid(.~forest$Cover) +
  ggtitle('Fig 7:  Aspect vs Slope per Cover category')

forest <- forest %>% 
  mutate(all_wild = Wilderness_Area1+Wilderness_Area2+Wilderness_Area3+Wilderness_Area4)
summary(forest$all_wild)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1       1       1       1       1       1

Based on this we can see that each tree is assigned to one and only one Wilderness_Area.

Converting Wilderness_Area to a single column:

factors = c(12:15)
wilderness_area = rep(0, length(forest))
for(f in factors){
    colvalues=forest[,f]
    wilderness_area[which(colvalues==1)] = f-11
}
forest$all_wilderness_area <- wilderness_area
summary(forest$all_wilderness_area)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0     2.0     3.0     2.8     4.0     4.0
forest %>% 
  mutate(Cover = as.factor(Cover_Type)) %>%
  ggplot(aes(x=Cover, y=all_wilderness_area)) +
  geom_jitter(alpha=0.1) +
  ggtitle('Fig x:  Wilderness Area seems to be a predictor of Cover')+
  ylab('Wilderness Area')

factors = c(16:55)
soil_type = rep(0, length(forest))
for(f in factors){
    colvalues=forest[,f]
    soil_type[which(colvalues==1)]=soil_type[which(colvalues==1)]+1
}
forest$all_soil_type <- soil_type
max(forest$all_soil_type)
## [1] NA
summary(soil_type)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       1       1       1       1       1       1   15061

Based on this we can see that each tree is assigned to one and only one Soil_Type.

Converting Soil_Type to a factor:

factors = c(16:55)
soil_type = rep(0, length(forest))
for(f in factors){
    colvalues=forest[,f]
    soil_type[which(colvalues==1)] = f-15
}
forest$all_soil_type <- soil_type
summary(forest$all_soil_type)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   10.00   17.00   19.17   30.00   40.00
forest %>% 
  mutate(Cover = as.factor(Cover_Type)) %>%
  ggplot(aes(x=Cover, y=all_soil_type)) +
  geom_jitter(alpha=.1)+
  ggtitle('Fig x:  Soil Type seems to be a predictor of Cover')

forest %>% 
  ggplot(aes(x=all_soil_type, y=all_wilderness_area)) +
  geom_jitter(alpha=.1) +
  facet_grid(.~forest$Cover) +
  ggtitle('Fig x:  Soil Type seems to be a predictor of Cover')

Modeling

library(nnet)
forest_tmp <- forest %>%
  mutate(wildarea = as.factor(all_wilderness_area)) %>%
  mutate(soiltype = as.factor(all_soil_type))

mod <- multinom(data=forest_tmp, Cover~Elevation + wildarea)
## # weights:  42 (30 variable)
## initial  value 29422.161454 
## iter  10 value 22489.965485
## iter  20 value 18155.112778
## iter  30 value 15278.359338
## iter  40 value 14201.820304
## iter  50 value 14181.132533
## iter  60 value 14177.416535
## final  value 14177.353532 
## converged
summary(mod)
## Call:
## multinom(formula = Cover ~ Elevation + wildarea, data = forest_tmp)
## 
## Coefficients:
##   (Intercept)    Elevation   wildarea2  wildarea3  wildarea4
## 2    23.65555 -0.007838952   0.6095364  0.0932434  4.8938514
## 3    71.94577 -0.031387408  10.4127513 16.1089596 17.5496549
## 4    83.82780 -0.035079338  13.1048834  2.2783062 14.4994439
## 5    43.15569 -0.014817494 -46.3362704  0.8737884 -2.7748701
## 6    67.17515 -0.030546283  20.5220075 18.8211409 20.3142486
## 7   -56.17451  0.017173359  -1.1246601  0.4832609  0.8281581
## 
## Std. Errors:
##    (Intercept)    Elevation    wildarea2    wildarea3    wildarea4
## 2 1.605222e-04 1.286866e-05 1.803751e-03 3.731080e-02 4.122955e-04
## 3 1.815021e-03 2.344580e-05 3.387814e-10 1.424904e-02 1.606351e-02
## 4 4.498677e-05 2.594585e-05 1.044617e-08 1.057492e-08 4.496550e-05
## 5 1.289519e-04 1.599588e-05 1.348750e-24 2.761592e-02 3.634967e-06
## 6 1.782303e-03 2.299793e-05 8.589032e-07 1.426924e-02 1.605050e-02
## 7 1.679216e-05 1.320445e-05 6.011271e-04 2.191129e-03 6.147223e-13
## 
## Residual Deviance: 28354.71 
## AIC: 28414.71